library(here)
library(ggplot2)
library(plotly)
library(tidyverse)
library(treemapify)
curr.dir = here()
knitr::opts_knit$set(root.dir = curr.dir)
knitr::opts_knit$get()$root.dir
## [1] "/Users/luischavesrodriguez/OneDrive - Imperial College London/ExtratimeWork/COVID19"

Up-to-date exploration by date

up.to.date = read.csv("NovelCOVID/covid_19_data.csv")
up.to.date$Active = up.to.date$Confirmed - up.to.date$Deaths-up.to.date$Recovered
countryToContinent = read.csv("Countries-Continents.csv")
countryToContinent = rbind(countryToContinent, cbind(Continent = c(rep("Asia",5),rep("Europe",2)),
                                                     Country = c("Hong Kong", "Mainland China",
                                                              "Macau", "Taiwan","South Korea",
                                                                 "UK", "Czech Republic")))
countryToContinent$Continent = as.character(countryToContinent$Continent)
countryToContinent$Country = as.character(countryToContinent$Country)
up.to.date = merge(up.to.date, countryToContinent, by.x = "Country.Region", by.y = "Country", all.x = T, sort = F)

Including Plots

You can also embed plots, for example:

sum.table = up.to.date %>% 
select(-c(Last.Update, SNo, ObservationDate)) %>% 
group_by(Continent, Country.Region, Province.State) %>%
summarise(Confirmed = max(Confirmed), Deaths = max(Deaths), #using max because data is cumulative
          Recovered = max(Recovered), Active = max(Active)) %>% arrange(-Active)
knitr::kable(sum.table %>% head(20))
Continent Country.Region Province.State Confirmed Deaths Recovered Active
Europe Italy 80589 8215 10361 62013
Asia Mainland China Hubei 67801 3169 61201 50633
Europe Spain 57786 4365 7015 46406
Europe Germany 43938 267 5673 37998
North America US New York 37877 385 0 37492
Europe France 29155 1696 4948 22511
Europe France French Polynesia 19874 860 2200 16814
Asia Iran 29406 2234 10457 16715
Europe France France 14282 562 12 13708
Europe Switzerland 11811 191 131 11489
Europe UK 11658 578 135 10945
Asia South Korea 9241 131 4144 7577
Europe Netherlands 7431 434 3 6994
North America US New Jersey 6876 81 1 6795
Europe Austria 6909 49 112 6748
Europe Belgium 6235 220 675 5340
Europe UK United Kingdom 5018 233 65 4720
North America US California 3899 81 6 3818
Asia Turkey 3629 75 26 3528
Europe Netherlands Netherlands 3631 136 2 3493
plot1 = ggplotly(sum.table %>% 
mutate(Province.State = ifelse(Province.State == "",
                               as.character(Country.Region), 
                               as.character(Province.State))) %>% select(-Active) %>% 
head(20) %>%
pivot_longer(-c(Country.Region, Province.State, Continent), names_to = "Metric", values_to = "Amount") %>%
ggplot(aes(x = reorder(Province.State, Amount), y = Amount, fill = Continent))+
geom_col(color = 'black')+
facet_wrap(~Metric)+coord_flip()+theme_minimal())
plot1
plot2 = ggplotly(sum.table %>% 
mutate(Province.State = ifelse(Province.State == "",
                               as.character(Country.Region), 
                               as.character(Province.State))) %>% select(-Active) %>%
head(20) %>%
pivot_longer(-c(Country.Region, Province.State, Continent), names_to = "Metric", values_to = "Amount") %>%
ggplot(aes(x = reorder(Province.State, Amount), y = Amount))+
geom_col(aes(fill = Metric), position = "identity")+coord_flip()+theme_minimal())
plot2
sum.table %>% 
mutate(Province.State = ifelse(Province.State == "",
                               as.character(Country.Region), 
                               as.character(Province.State))) %>% 
select(-Active) %>%
head(10)%>% 
ggplot(aes(area = Deaths, label = paste0(Province.State,":\n",Deaths),
           fill = Continent, subgroup = Continent))+geom_treemap()+
  geom_treemap_text(colour = "white", place = "topleft")+coord_fixed()+
ggtitle("Number of deaths by country\nin top 10 countries with more active cases")

Number of confirmed cases in top 10 regions with most active cases

sum.table %>% 
mutate(Province.State = ifelse(Province.State == "",
                               as.character(Country.Region), 
                               as.character(Province.State))) %>% 
head(10)%>% 
ggplot(aes(area = Confirmed, label = paste0(Province.State,":\n",Confirmed),
           fill = Continent,subgroup = Continent))+geom_treemap()+
  geom_treemap_text(colour = "white", place = "topleft")+coord_fixed()+
ggtitle("Number of confirmed cases by country\nin top 10 countries with more active cases")

sum.table = sum.table %>% mutate(Province.State = ifelse(Province.State == "",
                               as.character(Country.Region), 
                               as.character(Province.State))) 

df.plotly = sum.table
df.plotly$Continent = ifelse(is.na(df.plotly$Continent), "Missing", df.plotly$Continent)

conts = df.plotly %>% group_by(Continent) %>% summarise(Deaths = sum(Deaths),
                                                        Confirmed = sum(Confirmed),
                                                        Recovered = sum(Recovered),
                                                        Active = sum(Active)) %>%
  mutate(parent = "") %>% rename(labels = Continent)

countrs = df.plotly %>% group_by(Country.Region) %>% 
  summarise(Deaths = sum(Deaths),Confirmed = sum(Confirmed),
            Recovered = sum(Recovered),Active = sum(Active), 
            parent = unique(Continent)) %>%  rename(labels = Country.Region)

regs = df.plotly %>% ungroup() %>% select(-Continent) %>% rename(labels = Province.State, parent = Country.Region)
regs = regs[,c(2,3,4,5,6,1)]
df.plotly = rbind(conts, countrs)
toPlot = df.plotly %>% filter(Active > 100)

plot4 <- plot_ly(
  type="treemap",
  values=toPlot$Deaths,
  labels = toPlot$labels,
  parents= toPlot$parent
 # textinfo="label+value+percent parent+percent entry+percent root",
  #domain=list(column=0)
)
plot4
plot4 <- plot_ly(
  type="treemap",
  values=toPlot$Confirmed,
  labels = toPlot$labels,
  parents= toPlot$parent
 # textinfo="label+value+percent parent+percent entry+percent root",
  #domain=list(column=0)
)
plot4
plot4 <- plot_ly(
  type="treemap",
  values=toPlot$Recovered,
  labels = toPlot$labels,
  parents= toPlot$parent
 # textinfo="label+value+percent parent+percent entry+percent root",
  #domain=list(column=0)
)
plot4
plot4 <- plot_ly(
  type="treemap",
  values=toPlot$Deaths/toPlot$Confirmed,
  labels = toPlot$labels,
  parents= toPlot$parent
 # textinfo="label+value+percent parent+percent entry+percent root",
  #domain=list(column=0)
)
plot4
plot4 <- plot_ly(
  type="treemap",
  values=toPlot$Recovered/toPlot$Confirmed,
  labels = toPlot$labels,
  parents= toPlot$parent
 # textinfo="label+value+percent parent+percent entry+percent root",
  #domain=list(column=0)
)
plot4